package ink.charon.novel.core.processor;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;

import java.util.HashSet;
import java.util.Set;

public class TestProcessor implements PageProcessor {
    @Override
    public void process(Page page) {
        System.out.println(page.getHtml());
    }

    @Override
    public Site getSite() {
        Site site = Site.me()
                .setCharset("UTF-8")
                .setUserAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36")
                .setRetryTimes(3)
                .setSleepTime(0)
                .setTimeOut(10000);// 抓取网站的相关配置，包括编码、抓取间隔、重试次数等;
        Set<Integer> codes = new HashSet<>();
        codes.add(414);
        codes.add(404);
        codes.add(200);
        codes.add(503);
        codes.add(403);
        codes.add(407);
        codes.add(432);
        site.setAcceptStatCode(codes);
        return site;
    }
}
